Return to Data Visualisation Section
The data comes from the Tidy Tuesday project launched by R for Data Science team. It contains state-level salary information on registered nurses over the period 1998 to 2020.
nurses %>%
group_by(year) %>%
summarise(mean_salary = mean(annual.salary.avg, na.rm = TRUE),
higher_band = mean(annual.90th.percentile, na.rm = TRUE),
lower_band = mean(annual.10th.percentile, na.rm = TRUE)) %>%
ggplot(aes(year, mean_salary)) +
geom_line(colour = "dodgerblue", size = 1) +
geom_ribbon(aes(ymin = lower_band, ymax = higher_band),
fill = "dodgerblue", colour = "dodgerblue", alpha = 0.5,
lty = "dashed") +
labs(title = "Average Salaries Over Time",
subtitle = "Confidence Bands showing 10th and 90th percentiles",
x = NULL,
y = "Salary") +
scale_x_continuous(labels = scales::number_format(accuracy = 1,
big.mark = "")) +
scale_y_continuous(labels = scales::dollar_format()) +
expand_limits(y = 0) +
theme_bw() +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"))
nurses %>%
group_by(year) %>%
summarise(mean_salary = mean(annual.salary.avg, na.rm = T)) %>%
mutate(change = mean_salary/lag(mean_salary) - 1) %>%
filter(!is.na(change)) %>%
ggplot(aes(year, change)) +
geom_col(fill = "dodgerblue") +
labs(title = "Yearly Salary Growth For Registered Nurses",
subtitle = "Growth calculated on average salaries across states",
x = NULL,
y = "YoY Change in Salaries") +
scale_x_continuous(labels = scales::number_format(accuracy = 1,
big.mark = "")) +
scale_y_continuous(labels = scales::percent_format()) +
expand_limits(y = 0) +
theme_bw() +
theme(panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"))
nurses %>%
group_by(year) %>%
summarise(total_employment = mean(total.employed..national._aggregate, na.rm = T),
total_employment_healthcare = mean(total.employed..healthcare..national._aggregate, na.rm = T)) %>%
mutate(total_growth = total_employment/lag(total_employment) - 1,
healthcare_growth = total_employment_healthcare/lag(total_employment_healthcare) - 1) %>%
select(year, total_growth, healthcare_growth) %>%
filter(!is.na(total_growth)) %>%
rename("Healthcare" = healthcare_growth,
"Aggregate Economy" = total_growth) %>%
pivot_longer(-c(year)) %>%
ggplot(aes(year, value, fill = name)) +
geom_col(position = "dodge") +
labs(title = "YoY Employment Growth: Healthcare vs. Aggregate",
subtitle = "Growth calculated on aggregate values",
x = NULL,
y = "YoY Change in Employment",
fill = NULL) +
scale_x_continuous(labels = scales::number_format(accuracy = 1,
big.mark = "")) +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(values = c("midnightblue", "firebrick")) +
expand_limits(y = 0) +
theme_bw() +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"))
nurses %>%
group_by(year) %>%
summarise(nurses = sum(total.employed.rn, na.rm = T)) %>%
ggplot(aes(year, nurses)) +
geom_line(colour = "dodgerblue") +
geom_point(colour = "dodgerblue") +
expand_limits(y = 0) +
labs(title = "Total Employed Registered Nurses In The US",
subtitle = "Data from Data.World",
x = NULL,
y = NULL,
fill = NULL) +
scale_x_continuous(labels = scales::number_format(accuracy = 1,
big.mark = "")) +
scale_y_continuous(labels = scales::comma_format()) +
scale_fill_manual(values = c("midnightblue", "firebrick")) +
expand_limits(y = 0) +
theme_bw() +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"))
nurses %>%
filter(year == 2020) %>%
select(state, annual.salary.median) %>%
mutate(state = fct_reorder(state, annual.salary.median)) %>%
ggplot(aes(annual.salary.median, state)) +
geom_col(fill = "dodgerblue", colour = "white") +
labs(title = "Nurse Salaries By US States",
subtitle = "Data from Data.World",
x = "Median Salary",
y = NULL,
fill = NULL) +
scale_x_continuous(labels = scales::dollar_format()) +
theme_bw() +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"))
nurses %>%
filter(year == "2020") %>%
mutate(state = tolower(state)) %>%
inner_join(map_data("state"), by = c(state = "region")) %>%
ggplot(aes(long, lat, group = group, fill = hourly.wage.median)) +
geom_polygon() +
coord_map() +
labs(title = "Nurse Hourly Median Wages By US States",
subtitle = "Data from Data.World",
fill = "Hourly Median Wage") +
scale_fill_gradient(low = "midnightblue", high = "dodgerblue",
labels = scales::dollar_format()) +
theme_void() +
theme(plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"))
A work by Mathias Steilen